import pandas as pd
import requests
from bs4 import BeautifulSoup
import yaml

class DataLoader:
    def __init__(self, config_path="config.yaml"):
        with open(config_path, "r") as f:
            self.config = yaml.safe_load(f)

    def load_csv(self, file_path):
        """加载本地CSV文件"""
        return pd.read_csv(file_path)

    def fetch_web_data(self, url=None):
        """爬取网页数据（示例：国家统计局数字经济数据）"""
        if url is None:
            url = self.config["data_sources"]["web"]["url"]
        response = requests.get(url)
        soup = BeautifulSoup(response.text, 'html.parser')
        table = soup.find('table')
        rows = table.find_all('tr')
        data = []
        for row in rows:
            cols = row.find_all('td')
            cols = [col.text.strip() for col in cols]
            data.append(cols)
        return pd.DataFrame(data[1:], columns=data[0])

    def load_from_api(self, api_url):
        """从API加载数据（示例：绿色能源API）"""
        response = requests.get(api_url)
        return pd.DataFrame(response.json())